From df7b656e4e6b0cfe03713fa71c2ab4c3055aa327 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= <pippin@gimp.org>
Date: Sun, 1 Apr 2012 03:08:53 +0100
Subject: [PATCH] extensions: add lut based gamma for float

---
 extensions/Makefile.am  |   3 +-
 extensions/fast-float.c | 463 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 465 insertions(+), 1 deletion(-)
 create mode 100644 extensions/fast-float.c

diff --git a/extensions/Makefile.am b/extensions/Makefile.am
index c017046..4d2d4cb 100644
--- a/extensions/Makefile.am
+++ b/extensions/Makefile.am
@@ -22,6 +22,7 @@ ext_LTLIBRARIES = \
 	gggl.la         \
 	gimp-8bit.la    \
 	float.la    \
+	fast-float.la    \
 	naive-CMYK.la   \
 	sse-fixups.la
 
@@ -34,8 +35,8 @@ gimp_8bit_la_SOURCES = gimp-8bit.c
 naive_CMYK_la_SOURCES = naive-CMYK.c
 sse_fixups_la_SOURCES = sse-fixups.c
 float_la_SOURCES = float.c
+fast_float_la_SOURCES = fast-float.c
 
 LIBS = $(top_builddir)/babl/libbabl-@BABL_API_VERSION@.la $(MATH_LIB)
 
 sse_fixups_la_CFLAGS = $(MMX_EXTRA_CFLAGS) $(SSE_EXTRA_CFLAGS)
-float_la_CFLAGS = -std=c99
diff --git a/extensions/fast-float.c b/extensions/fast-float.c
new file mode 100644
index 0000000..2d02931
--- /dev/null
+++ b/extensions/fast-float.c
@@ -0,0 +1,463 @@
+/* babl - dynamically extendable universal pixel conversion library.
+ * Copyright (C) 2012, Ãyvind KolÃ¥s
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "babl.h"
+#include "babl-cpuaccel.h"
+#include "extensions/util.h"
+#include "base/util.h"
+
+#define INLINE inline
+
+
+typedef  float (* BablLookupFunction) (float  value,
+                                       void  *data);
+#define babl_LOOKUP_MAX_ENTRIES   (819200)
+
+typedef struct BablLookup
+{
+  BablLookupFunction function;
+  void              *data;
+  int               shift;
+  unsigned int            positive_min, positive_max, negative_min, negative_max;
+  unsigned int            bitmask[babl_LOOKUP_MAX_ENTRIES/32];
+  float             table[];
+} BablLookup;
+
+
+static BablLookup *babl_lookup_new (BablLookupFunction  function,
+                                    void *              data,
+                                    float               start,
+                                    float               end,
+                                    float               precision);
+#if 0
+static void        babl_lookup_free      (BablLookup         *lookup);
+#endif
+
+
+static inline float
+babl_lookup (BablLookup *lookup,
+             float      number)
+{
+  union
+  {
+    float   f;
+    unsigned int i;
+  } u;
+  unsigned int i;
+
+  u.f = number;
+  i = u.i >> lookup->shift;
+
+  if (i > lookup->positive_min &&
+      i < lookup->positive_max)
+    i = i - lookup->positive_min;
+  else if (i > lookup->negative_min &&
+           i < lookup->negative_max)
+    i = i - lookup->negative_min + (lookup->positive_max - lookup->positive_min);
+  else
+    return lookup->function (number, lookup->data);
+
+  if (!(lookup->bitmask[i/32] & (1<<(i & 31))))
+    {
+      lookup->table[i]= lookup->function (number, lookup->data);
+      lookup->bitmask[i/32] |= (1<<(i & 31));
+    }
+
+  return lookup->table[i];
+}
+
+static BablLookup *
+babl_lookup_new (BablLookupFunction function,
+                 void *             data,
+                 float              start,
+                 float              end,
+                 float              precision)
+{
+  BablLookup *lookup;
+  union
+  {
+    float   f;
+    unsigned int i;
+  } u;
+  int positive_min, positive_max, negative_min, negative_max;
+  int shift;
+
+  /* normalize input parameters */
+  if (start > end)
+    { /* swap */
+      u.f = start;
+      start = end;
+      end = u.f;
+    }
+
+       if (precision <= 0.000005) shift =  0; /* checked for later */
+  else if (precision <= 0.000010) shift =  8;
+  else if (precision <= 0.000020) shift =  9;
+  else if (precision <= 0.000040) shift = 10;
+  else if (precision <= 0.000081) shift = 11;
+  else if (precision <= 0.000161) shift = 12;
+  else if (precision <= 0.000324) shift = 14;
+  else if (precision <= 0.000649) shift = 15;
+  else shift = 16; /* a bit better than 8bit sRGB quality */
+
+  /* Adjust slightly away from 0.0, saving many entries close to 0, this
+   * causes lookups very close to zero to be passed directly to the
+   * function instead.
+   */
+  if (start == 0.0)
+    start = precision;
+  if (end == 0.0)
+    end = -precision;
+
+  /* Compute start and */
+
+  if (start < 0.0 || end < 0.0)
+    {
+      if (end < 0.0)
+        {
+          u.f = start;
+          positive_max = u.i >> shift;
+          u.f = end;
+          positive_min = u.i >> shift;
+          negative_min = positive_max;
+          negative_max = positive_max;
+        }
+      else
+        {
+          u.f = 0 - precision;
+          positive_min = u.i >> shift;
+          u.f = start;
+          positive_max = u.i >> shift;
+
+          u.f = 0 + precision;
+          negative_min = u.i >> shift;
+          u.f = end;
+          negative_max = u.i >> shift;
+        }
+    }
+  else
+    {
+      u.f = start;
+      positive_min = u.i >> shift;
+      u.f = end;
+      positive_max = u.i >> shift;
+      negative_min = positive_max;
+      negative_max = positive_max;
+    }
+
+  if (shift == 0) /* short circuit, do not use ranges */
+    {
+      positive_min = positive_max = negative_min = negative_max = 0;
+    }
+
+  if ((positive_max-positive_min) + (negative_max-negative_min) > babl_LOOKUP_MAX_ENTRIES)
+    {
+      /* Reduce the size of the cache tables to fit within the bittable
+       * budget (the maximum allocation is around 2.18mb of memory
+       */
+
+      int diff = (positive_max-positive_min) + (negative_max-negative_min) - babl_LOOKUP_MAX_ENTRIES;
+
+      if (negative_max - negative_min > 0)
+        {
+          if (negative_max - negative_min >= diff)
+            {
+              negative_max -= diff;
+              diff = 0;
+            }
+          else
+            {
+              diff -= negative_max - negative_min;
+              negative_max = negative_min;
+            }
+        }
+      if (diff)
+        positive_max-=diff;
+    }
+
+  lookup = calloc (sizeof (BablLookup) + sizeof (float) *
+                                                  ((positive_max-positive_min)+
+                                                   (negative_max-negative_min)), 1);
+
+  lookup->positive_min = positive_min;
+  lookup->positive_max = positive_max;
+  lookup->negative_min = negative_min;
+  lookup->negative_max = negative_max;
+  lookup->shift = shift;
+  lookup->function = function;
+  lookup->data = data;
+
+  return lookup;
+}
+
+static BablLookup *fast_pow = NULL;
+
+static inline float core_lookup (float val, void *userdata)
+{
+  return linear_to_gamma_2_2 (val);
+}
+
+static float
+linear_to_gamma_2_2_lut (float val)
+{
+  return babl_lookup (fast_pow, val);
+}
+
+
+static BablLookup *fast_rpow = NULL;
+
+static inline float core_rlookup (float val, void *userdata)
+{
+  return gamma_2_2_to_linear (val);
+}
+
+static float
+gamma_2_2_to_linear_lut (float val)
+{
+  return babl_lookup (fast_rpow, val);
+}
+
+#if 0
+static void
+babl_lookup_free (BablLookup *lookup)
+{
+  free (lookup);
+}
+#endif
+
+static INLINE long
+conv_rgbaF_linear_rgbAF_gamma (unsigned char *src, 
+                               unsigned char *dst, 
+                               long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       float alpha = fsrc[3];
+       *fdst++ = linear_to_gamma_2_2_lut (*fsrc++) * alpha;
+       *fdst++ = linear_to_gamma_2_2_lut (*fsrc++) * alpha;
+       *fdst++ = linear_to_gamma_2_2_lut (*fsrc++) * alpha;
+       *fdst++ = *fsrc++;
+     }
+  return samples;
+}
+
+static INLINE long
+conv_rgbAF_linear_rgbAF_gamma (unsigned char *src, 
+                               unsigned char *dst, 
+                               long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       float alpha = fsrc[3];
+       if (alpha < BABL_ALPHA_THRESHOLD)
+         {
+           *fdst++ = 0.0;
+           *fdst++ = 0.0;
+           *fdst++ = 0.0;
+           *fdst++ = 0.0;
+           fsrc+=4;
+         }
+       else if (alpha >= 1.0)
+         {
+           *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
+           *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
+           *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
+           *fdst++ = *fsrc++;
+         }
+       else
+         {
+           float alpha_recip = 1.0 / alpha;
+           *fdst++ = linear_to_gamma_2_2_lut (*fsrc++ * alpha_recip) * alpha;
+           *fdst++ = linear_to_gamma_2_2_lut (*fsrc++ * alpha_recip) * alpha;
+           *fdst++ = linear_to_gamma_2_2_lut (*fsrc++ * alpha_recip) * alpha;
+           *fdst++ = *fsrc++;
+         }
+     }
+  return samples;
+}
+
+static INLINE long
+conv_rgbaF_linear_rgbaF_gamma (unsigned char *src, 
+                               unsigned char *dst, 
+                               long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
+       *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
+       *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
+       *fdst++ = *fsrc++;
+     }
+  return samples;
+}
+
+static INLINE long
+conv_rgbF_linear_rgbF_gamma (unsigned char *src, 
+                             unsigned char *dst, 
+                             long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
+       *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
+       *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
+     }
+  return samples;
+}
+
+
+static INLINE long
+conv_rgbaF_gamma_rgbaF_linear (unsigned char *src, 
+                               unsigned char *dst, 
+                               long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
+       *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
+       *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
+       *fdst++ = *fsrc++;
+     }
+  return samples;
+}
+
+static INLINE long
+conv_rgbF_gamma_rgbF_linear (unsigned char *src, 
+                             unsigned char *dst, 
+                             long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
+       *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
+       *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
+     }
+  return samples;
+}
+
+#define o(src, dst) \
+  babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL)
+
+int init (void);
+
+int
+init (void)
+{
+  const Babl *rgbaF_linear = babl_format_new (
+    babl_model ("RGBA"),
+    babl_type ("float"),
+    babl_component ("R"),
+    babl_component ("G"),
+    babl_component ("B"),
+    babl_component ("A"),
+    NULL);
+  const Babl *rgbAF_linear = babl_format_new (
+    babl_model ("RaGaBaA"),
+    babl_type ("float"),
+    babl_component ("Ra"),
+    babl_component ("Ga"),
+    babl_component ("Ba"),
+    babl_component ("A"),
+    NULL);
+  const Babl *rgbaF_gamma = babl_format_new (
+    babl_model ("R'G'B'A"),
+    babl_type ("float"),
+    babl_component ("R'"),
+    babl_component ("G'"),
+    babl_component ("B'"),
+    babl_component ("A"),
+    NULL);
+  const Babl *rgbAF_gamma = babl_format_new (
+    babl_model ("R'aG'aB'aA"),
+    babl_type ("float"),
+    babl_component ("R'a"),
+    babl_component ("G'a"),
+    babl_component ("B'a"),
+    babl_component ("A"),
+    NULL);
+  const Babl *rgbF_linear = babl_format_new (
+    babl_model ("RGB"),
+    babl_type ("float"),
+    babl_component ("R"),
+    babl_component ("G"),
+    babl_component ("B"),
+    NULL);
+  const Babl *rgbF_gamma = babl_format_new (
+    babl_model ("R'G'B'"),
+    babl_type ("float"),
+    babl_component ("R'"),
+    babl_component ("G'"),
+    babl_component ("B'"),
+    NULL);
+
+  {
+    float f;
+    float a;
+
+    fast_pow = babl_lookup_new (core_lookup, NULL, 0.0, 1.0,   0.00001);
+    fast_rpow = babl_lookup_new (core_rlookup, NULL, 0.0, 1.0, 0.00001);
+
+    for (f = 0.0; f < 1.0; f+= 0.000001)
+      {
+        a = linear_to_gamma_2_2_lut (f);
+        a = gamma_2_2_to_linear_lut (f);
+      }
+    if (a < -10)
+      f = 2;
+
+  }
+
+  o (rgbAF_linear, rgbAF_gamma);
+  o (rgbaF_linear, rgbAF_gamma);
+  o (rgbaF_linear, rgbaF_gamma);
+  o (rgbaF_gamma,  rgbaF_linear);
+  o (rgbF_linear, rgbF_gamma);
+  o (rgbF_gamma,  rgbF_linear);
+
+  return 0;
+}
+
-- 
2.30.2